Question 2
b(i)
library(tidyverse)
#rm(list = ls())
prostate_data = read.table("data/prostate_data.txt")
head(prostate_data)
## lcavol lweight age lbph svi lcp gleason pgg45 lpsa
## 1 -0.5798185 2.769459 50 -1.386294 0 -1.386294 6 0 -0.4307829
## 2 -0.9942523 3.319626 58 -1.386294 0 -1.386294 6 0 -0.1625189
## 3 -0.5108256 2.691243 74 -1.386294 0 -1.386294 7 20 -0.1625189
## 4 -1.2039728 3.282789 58 -1.386294 0 -1.386294 6 0 -0.1625189
## 5 0.7514161 3.432373 62 -1.386294 0 -1.386294 6 0 0.3715636
## 6 -1.0498221 3.228826 50 -1.386294 0 -1.386294 6 0 0.7654678
## train
## 1 TRUE
## 2 TRUE
## 3 TRUE
## 4 TRUE
## 5 TRUE
## 6 TRUE
in_sample_obs = prostate_data%>%as_tibble() %>%
filter(train==TRUE)
out_of_sample_obs = prostate_data%>%as_tibble() %>%
filter(train==FALSE)
regopt = function(tuning, Y, X, nitermax=20){
lambda1 = tuning[1]
lambda2 = tuning[2]
p = ncol(X)
beta = rep(0, p)
errortol = 1e-4
A = NULL
if(lambda1 == 0 && lambda2 == 0){
# compute OLS Beta
beta = ols_beta(Y,X)
}else {
for (i in 1:nitermax) {
# Store previous beta for convergence check
beta_prev = beta
# Update each coefficient in turn
for (l in 1:p){
beta[l] = beta_gradient_desc(tuning, Y, X, beta,l)
}
# Check for convergence
if( max(abs(beta - beta_prev)) < errortol){
break
}
}
}
A = elastic_net_obj_function(tuning, Y,X, beta)
return(list(errortol= errortol, beta = beta, A = A))
}
ols_beta = function(Y,X){
beta_hat = solve(t(X)%*%X)%*%t(X)%*%Y
return (beta_hat)
}
compute_A <- function(Y, X, beta, lambda1, lambda2) {
residual_sum_squares <- sum((Y - X %*% beta)^2)
lasso_penalty <- lambda1 * sum(abs(beta))
ridge_penalty <- lambda2 * sum(beta^2)
A <- residual_sum_squares + lasso_penalty + ridge_penalty
return(A)
}
elastic_net_obj_function = function(tuning, Y,X, beta){
lambda1 = tuning[1]
lambda2 = tuning[2]
one_transponse = t(matrix(1, nrow=length(beta), ncol=1))
rss = t((Y-X%*%beta))%*%(Y-X%*%beta)
lasso_penalty = lambda1 * one_transponse %*% abs(beta)
ridge_penalty = lambda2 * t(beta)%*%beta
A = rss + lasso_penalty + ridge_penalty
return(A)
}
beta_gradient_desc = function(tuning, Y,X, beta, l){
lambda1 = tuning[1]
lambda2 = tuning[2]
r = Y - X[, -l]%*%beta[-l] # exclude effect of beta_l
numerator = sum(X[, l] * r) - (lambda1 / 2) * sign(beta[l]) - lambda2 * beta[l]
denominator = sum(X[, l]^2)
beta_new = numerator / denominator
return (beta_new)
}
in_sample_obs_std = scale(in_sample_obs[,-10], center=TRUE, scale=TRUE)%>%as_tibble()
Y_train = as.matrix(in_sample_obs_std$lpsa)
colnames(Y_train) = "lpsa"
X_train = as.matrix(in_sample_obs_std[, -9])
# Execute regopt
tuning <- c(0.1, 0.1)
result <- regopt(tuning, Y_train, X_train, nitermax = 100)
print(result)
## $errortol
## [1] 1e-04
##
## $beta
## [1] 0.58827529 0.24202016 -0.11607918 0.17445798 0.25443318 -0.23146209
## [7] -0.01320665 0.22296661
##
## $A
## lpsa
## lpsa 20.41962
b(ii)
loo_cv = function(Y,X, lambdas, lasso_regression=TRUE){
n = nrow(Y)
p = ncol(X)
# Setup up cross-validation grid matrix
cv_grid = matrix(data=NA, nrow=nrow(X), ncol = length(lambdas))
# Setup cross validation error grid matrix
cv_error_grid = matrix(data=NA, nrow=nrow(X), ncol = length(lambdas))
for( i in 1:n){
# Leave one observation out
x_training_set = X[-i,]
x_validation_set = X[i,]
y_training_set = Y[-i,]
y_validation_set = Y[i,]
for (j in 1: length(lambdas)){
result = NA
lambda = lambdas[j]
if(lasso_regression){
tuning = c(lambda, 0)
result = regopt(tuning, y_training_set, x_training_set, nitermax = 20)
}else{
tuning = c(0, lambda)
result = regopt(tuning, y_training_set, x_training_set, nitermax = 20)
}
y_hat = x_validation_set%*%result$beta
cv_grid[i,j] = y_hat
cv_error_grid[i, j] = (y_validation_set - y_hat)^2
}
}
colnames(cv_grid) = c(paste0("lambda", c(lambdas)))
colnames(cv_error_grid) = c(paste0("lambda", c(lambdas)))
mse = apply(cv_error_grid, 2, mean)
return (list(fitted_values = cv_grid, errors = cv_error_grid, mse=mse))
}
# Lambda range [0,5] across 100 points
#lambdas = 10^seq(0,log10(5), length.out=100)
lambdas = seq(0,5, length.out=100)
# Cross-validation for Lasso Regression
lasso_loo_cv_result = loo_cv(Y_train,X_train,lambdas,lasso_regression=TRUE)
lasso_reg_fitted_vals = lasso_loo_cv_result$fitted_values
lasso_reg_errors = lasso_loo_cv_result$errors
lasso_reg_mse = lasso_loo_cv_result$mse
optimal_lasso_lambda = lambdas[which.min(lasso_reg_mse)]
optimal_lasso_lambda
## [1] 1.010101
# Lasso Coefficients
tuning = c(optimal_lasso_lambda,0 )
lasso_result <- regopt(tuning, Y_train, X_train, nitermax = 20)
lasso_coefficients = lasso_result$beta
lasso_coefficients
## [1] 0.56923536 0.23815764 -0.10273133 0.16689761 0.24120637 -0.19010916
## [7] -0.00630515 0.19572855
plot(x= lambdas, y=lasso_reg_mse, type = "l", main="Lasso Regression LOOCV", xlab="lambda", ylab="CV MSE")

# Cross-validation for Ridge Regression
ridge_loo_cv_result = loo_cv(Y_train,X_train,lambdas,lasso_regression=FALSE)
ridge_reg_fitted_vals = ridge_loo_cv_result$fitted_values
ridge_reg_errors = ridge_loo_cv_result$errors
ridge_reg_mse = ridge_loo_cv_result$mse
optimal_ridge_lambda = lambdas[which.min(ridge_reg_mse)]
optimal_ridge_lambda
## [1] 3.434343
# Ridge Coefficients
tuning = c(0,optimal_ridge_lambda )
ridge_result <- regopt(tuning, Y_train, X_train, nitermax = 20)
ridge_coefficients = ridge_result$beta
ridge_coefficients
## [1] 0.527830561 0.239068732 -0.098759272 0.169576485 0.241391108
## [6] -0.159391458 0.004474402 0.184120639
plot(x= lambdas, y=ridge_reg_mse, type = "l", main="Ridge Regression LOOCV", xlab="lambda", ylab="CV MSE")

library(corrplot)
## corrplot 0.92 loaded
corr_matrix = cor(as.matrix(X_train))
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(corr_matrix, method = "color", col = col(200),
type = "full", order = "original",
addCoef.col = "white",
tl.col = "black", tl.srt = 45,
tl.pos = "lt",
tl.cex = 0.6, cl.cex = 0.7,
number.cex = 0.5
)

b (iii)
library(boot)
set.seed(123)
bootstrap_lasso_fit = function(data, indices){
data = data[indices,]
Y_train = as.matrix(data$lpsa)
colnames(Y_train) = "lpsa"
X_train = as.matrix(data[, -9])
tuning = c(optimal_lasso_lambda,0 )
lasso_result <- regopt(tuning, Y_train, X_train, nitermax = 20)
lasso_coefficients = lasso_result$beta
return(lasso_coefficients)
}
boot_results = boot(data = in_sample_obs_std, statistic = bootstrap_lasso_fit, R = 1000)
bootstrap_estimates <- boot_results$t
boot_results
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = in_sample_obs_std, statistic = bootstrap_lasso_fit,
## R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 0.56923536 -0.0129107952 0.11047961
## t2* 0.23815764 -0.0004787677 0.09306902
## t3* -0.10273133 0.0070653563 0.07072224
## t4* 0.16689761 0.0080934676 0.09351119
## t5* 0.24120637 0.0002915267 0.10626619
## t6* -0.19010916 0.0121471967 0.10396465
## t7* -0.00630515 0.0032717850 0.09706869
## t8* 0.19572855 0.0011523911 0.10114002
compute_ci <- function(bootstrap_estimates, level = 0.95) {
ci_lower <- apply(bootstrap_estimates, 2, quantile, probs = (1 - level) / 2)
ci_upper <- apply(bootstrap_estimates, 2, quantile, probs = 1 - (1 - level) / 2)
return(data.frame(Lower = ci_lower, Upper = ci_upper))
}
# Compute 95% confidence intervals for each coefficient
confidence_intervals <- compute_ci(bootstrap_estimates)
print(confidence_intervals)
## Lower Upper
## 1 0.325635829 0.7635247034
## 2 0.057948344 0.4199194687
## 3 -0.216830483 0.0509321832
## 4 0.004149456 0.3749541289
## 5 0.007029293 0.4393423690
## 6 -0.387586053 0.0004771296
## 7 -0.207443099 0.1875128709
## 8 0.014446424 0.4131771040
Question 3
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(coda)
#rm(list = ls())
patient_profiles = read.csv2(file="data/profiles2024.csv", header=TRUE)
patient_profiles = as.matrix(patient_profiles[1:25,])
# Initial Settings
n = 25
T = 7
p = 3
iterations = 30000
burn_in = 10000
# Chain Starting values
sigma_e2 = 1
sigma_phi2 = 2
phi = matrix(0, nrow=n, ncol=4)
# prior values
i1 = 2
i2 = 0.1
i3 = 2
i4 = 0.001
# Store initial chain values
sigma_e2_chain = numeric(iterations)
sigma_phi2_chain = numeric(iterations)
phi_chain = array(0, dim=c(n, 4, iterations))
# b(x) design matrix
b = function (x) { x* log(x) }
eta_k = c(0.5, 10, 25)
t_j = c(0,3,7,14,21,28,42)
X_i = matrix(0, nrow=T, ncol=4)
X_i[,1] = 1
for ( j in 1:T){
X_i[j, 2:4] = b(abs(t_j[j]-eta_k))
}
#------------------------------------- Perform Gibbs sampling-----------------------------------------------
for (k in 1:iterations){
# Update sigma_e2
patient_sum_of_squares = 0
for ( i in 1:n){
patient_sum_of_squares = patient_sum_of_squares + sum((patient_profiles[i, ]- X_i%*%phi[i,])^2)
}
sigma_e2 = 1/ rgamma(1, i1 + n*T/2, i2+ patient_sum_of_squares/2)
# Update sigma_phi2
sum_phi_squares = sum(phi^2)
sigma_phi2 = 1 / rgamma(1, i3 + n * (p+1)/ 2, i4 + sum_phi_squares)
# update phi_i
for ( i in 1:n ){
Sigma_phi <- solve((1/sigma_e2) * t(X_i) %*% X_i + diag(1/sigma_phi2, 4))
mu_phi <- Sigma_phi %*% ((1/sigma_e2) * t(X_i) %*% patient_profiles[i, ])
phi[i, ] <- mvrnorm(1, mu_phi, Sigma_phi)
}
# Store Markov Chain Draws
sigma_e2_chain[k] <- sigma_e2
sigma_phi2_chain[k] <- sigma_phi2
phi_chain[, , k] <- phi
}
# ---------------------- Trace Plots and Convergence Statistics ----------------------------
# Remove burn-in samples
sigma_e2_chain <- sigma_e2_chain[(burn_in+1):iterations]
sigma_phi2_chain <- sigma_phi2_chain[(burn_in+1):iterations]
phi_chain <- phi_chain[, , (burn_in+1):iterations]
# Convergence diagnostics
par(mfrow = c(2, 2), mar = c(2, 2, 2, 1))
plot(as.mcmc(sigma_e2_chain), main = "Trace Plot for sigma_e2", xlab = "Iteration", ylab = "Value")

plot(as.mcmc(sigma_phi2_chain), main = "Trace Plot for sigma_phi2", xlab = "Iteration", ylab = "Value")

# Convergence diagnostics for phi parameters
par(mfrow = c(25, 4), mar = c(2, 2, 2, 1), oma = c(4, 4, 2, 1), cex.main = 0.7)
for (i in 1:25) {
for (j in 1:4) {
plot(as.mcmc(phi_chain[i, j, ]),
main = paste("phi[", i, ",", j, "]", sep = ""),
xlab = "Iteration",
ylab = "Value",
col.main = "blue",
cex.main = 0.7)
}
}




































































































par(mfrow = c(1, 1)) # Reset to default
# Extract posterior means and 95% credible intervals for patient 15
posterior_means <- apply(phi_chain[15, , ], 1, mean)
credible_intervals <- apply(phi_chain[15, , ], 1, quantile, probs = c(0.025, 0.975))
#---------------------------------- Analyses of 15th Observed Profile --------------------------------
# Plot the 15th observed profile
observed_profile <- patient_profiles[15, ]
predicted_profile <- X_i %*% posterior_means
predicted_profile_lower <- X_i %*% credible_intervals[1, ]
predicted_profile_upper <- X_i %*% credible_intervals[2, ]
plot(t_j, observed_profile, type = 'b', col = 'red', ylim = c(min(observed_profile, predicted_profile_lower), max(observed_profile, predicted_profile_upper)), ylab = 'z_ij', xlab = 't_j')
lines(t_j, predicted_profile, type = 'b', col = 'black')
lines(t_j, predicted_profile_lower, type = 'b', col = 'blue', lty = 2)
lines(t_j, predicted_profile_upper, type = 'b', col = 'blue', lty = 2)
legend("topleft", legend = c("Observed Profile", "Predicted Profile", "95% CI Lower", "95% CI Upper"), col = c("red", "black", "blue", "blue"), lty = c(1, 1, 2, 2), pch = c(1, 1, NA, NA), pt.cex = 1, cex = 0.8)
